# -*- coding:utf-8 -*-
# @Author: shenyuyu
# @Time: 2023/6/15 19:35
# @File: 爬虫2.py

import re
import requests
import json
from openpyxl import Workbook

response = requests.get("http://www.gaosan.com/gaokao/583902.html")
print(response)
# with open("911cha.html", "w", encoding="utf8") as f:
#     f.write(response.text)
content = re.findall('<td x:="">(.*?)</td>', re.sub('<td x:="">[\d]+</td>', '', response.text.replace("*", "")), re.S)
content.pop(0)
content.pop(0)

print(content)

wb = Workbook()
active = wb.active
active["A1"] = "排名"
active["B1"] = "大学"

for i in range(0, len(content)):
    active["A" + str(i + 2)] = i + 1
    active["B" + str(i + 2)] = content[i]

wb.save("3.xlsx")

# l = []
#
# for i in range(0, len(content)):
#     d = {}
#     d["rank"] = i + 1
#     d["university"] = content[i]
#     l.append(d)
#
# dumps = json.dumps(l, ensure_ascii=False)
# with open("2.txt", "w", encoding="utf8") as f:
#     f.write(dumps)
